Load in the Data

We set the seed to 1178 for reproducability. This seed with remain the same for each model run.

options(stringsAsFactors=TRUE)

set.seed(1178)

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
setwd("/Users/richardcoleman/Git/")

dfTraining <- read.csv("pml-training.csv",row.names = NULL)

dfTesting <- read.csv("pml-testing.csv", row.names = NULL)

dfTraining$X <- NULL
dfTesting$X <- NULL
dfTraining <- dfTraining[, colSums(is.na(dfTraining)) == 0]

Examine the Data

library(Hmisc)
## Loading required package: grid
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:caret':
## 
##     cluster
## 
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
library(ggplot2)

describe(dfTraining)
## dfTraining 
## 
##  92  Variables      19622  Observations
## ---------------------------------------------------------------------------
## user_name 
##       n missing  unique 
##   19622       0       6 
## 
##           adelmo carlitos charles eurico jeremy pedro
## Frequency   3892     3112    3536   3070   3402  2610
## %             20       16      18     16     17    13
## ---------------------------------------------------------------------------
## raw_timestamp_part_1 
##         n   missing    unique      Info      Mean       .05       .10 
##     19622         0       837         1 1.323e+09 1.322e+09 1.322e+09 
##       .25       .50       .75       .90       .95 
## 1.323e+09 1.323e+09 1.323e+09 1.323e+09 1.323e+09 
## 
## lowest : 1322489605 1322489606 1322489607 1322489608 1322489609
## highest: 1323095077 1323095078 1323095079 1323095080 1323095081 
## ---------------------------------------------------------------------------
## raw_timestamp_part_2 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0   16783       1  500656   48389  100343  252912  496380 
##     .75     .90     .95 
##  751891  900367  950649 
## 
## lowest :    294    301    307    309    312
## highest: 998716 998741 998749 998750 998801 
## ---------------------------------------------------------------------------
## cvtd_timestamp 
##       n missing  unique 
##   19622       0      20 
## 
## lowest : 02/12/2011 13:32 02/12/2011 13:33 02/12/2011 13:34 02/12/2011 13:35 02/12/2011 14:56
## highest: 28/11/2011 14:14 28/11/2011 14:15 30/11/2011 17:10 30/11/2011 17:11 30/11/2011 17:12 
## ---------------------------------------------------------------------------
## new_window 
##       n missing  unique 
##   19622       0       2 
## 
## no (19216, 98%), yes (406, 2%) 
## ---------------------------------------------------------------------------
## num_window 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     858       1   430.6      44      88     222     424 
##     .75     .90     .95 
##     644     780     821 
## 
## lowest :   1   2   3   4   5, highest: 860 861 862 863 864 
## ---------------------------------------------------------------------------
## roll_belt 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1330       1   64.41   -0.20    0.53    1.10  113.00 
##     .75     .90     .95 
##  123.00  129.00  139.00 
## 
## lowest : -28.9 -28.8 -28.6 -28.4 -28.3
## highest: 158.0 159.0 160.0 161.0 162.0 
## ---------------------------------------------------------------------------
## pitch_belt 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1840       1  0.3053  -43.60  -42.10    1.76    5.28 
##     .75     .90     .95 
##   14.90   25.40   26.20 
## 
## lowest : -55.8 -54.9 -54.7 -54.4 -53.9
## highest:  59.9  60.0  60.1  60.2  60.3 
## ---------------------------------------------------------------------------
## yaw_belt 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1957       1  -11.21   -93.5   -92.9   -88.3   -13.0 
##     .75     .90     .95 
##    12.9   165.0   168.0 
## 
## lowest : -180 -179 -178 -177 -176, highest:  175  176  177  178  179 
## ---------------------------------------------------------------------------
## total_accel_belt 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0      29    0.98   11.31       2       3       3      17 
##     .75     .90     .95 
##      18      20      21 
## 
## lowest :  0  1  2  3  4, highest: 25 26 27 28 29 
## ---------------------------------------------------------------------------
## kurtosis_roll_belt 
##       n missing  unique 
##   19622       0     397 
## 
## lowest :           -0.016850 -0.021024 -0.025513 -0.033935
## highest: 5.587755  5.681869  6.545935  7.004355  7.515290  
## ---------------------------------------------------------------------------
## kurtosis_picth_belt 
##       n missing  unique 
##   19622       0     317 
## 
## lowest :           -0.021887 -0.060755 -0.099173 -0.108371
## highest: 8.953960  9.042959  9.296951  9.804491  9.896970  
## ---------------------------------------------------------------------------
## kurtosis_yaw_belt 
##       n missing  unique 
##   19622       0       2 
## 
##  (19216, 98%), #DIV/0! (406, 2%) 
## ---------------------------------------------------------------------------
## skewness_roll_belt 
##       n missing  unique 
##   19622       0     395 
## 
## lowest :           -0.003095 -0.010002 -0.014020 -0.015465
## highest: 2.058296  2.097857  2.674649  2.713152  3.595369  
## ---------------------------------------------------------------------------
## skewness_roll_belt.1 
##       n missing  unique 
##   19622       0     338 
## 
## lowest :           -0.005928 -0.005960 -0.008391 -0.017954
## highest: 6.164414  6.708204  6.782330  6.855655  7.348469  
## ---------------------------------------------------------------------------
## skewness_yaw_belt 
##       n missing  unique 
##   19622       0       2 
## 
##  (19216, 98%), #DIV/0! (406, 2%) 
## ---------------------------------------------------------------------------
## max_yaw_belt 
##       n missing  unique 
##   19622       0      68 
## 
## lowest :      -0.1 -0.2 -0.3 -0.4, highest: 5.6  5.7  6.5  7.0  7.5  
## ---------------------------------------------------------------------------
## min_yaw_belt 
##       n missing  unique 
##   19622       0      68 
## 
## lowest :      -0.1 -0.2 -0.3 -0.4, highest: 5.6  5.7  6.5  7.0  7.5  
## ---------------------------------------------------------------------------
## amplitude_yaw_belt 
##       n missing  unique 
##   19622       0       4 
## 
##  (19216, 98%), #DIV/0! (10, 0%), 0.00 (12, 0%) 
## 0.0000 (384, 2%) 
## ---------------------------------------------------------------------------
## gyros_belt_x 
##         n   missing    unique      Info      Mean       .05       .10 
##     19622         0       140         1 -0.005592     -0.45     -0.39 
##       .25       .50       .75       .90       .95 
##     -0.03      0.03      0.11      0.16      0.21 
## 
## lowest : -1.04 -1.00 -0.98 -0.96 -0.95
## highest:  1.88  1.98  2.02  2.20  2.22 
## ---------------------------------------------------------------------------
## gyros_belt_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0      69    0.97 0.03959   -0.05   -0.03    0.00    0.02 
##     .75     .90     .95 
##    0.11    0.13    0.13 
## 
## lowest : -0.64 -0.53 -0.51 -0.48 -0.45
## highest:  0.51  0.56  0.61  0.63  0.64 
## ---------------------------------------------------------------------------
## gyros_belt_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     169       1 -0.1305   -0.49   -0.44   -0.20   -0.10 
##     .75     .90     .95 
##   -0.02    0.02    0.11 
## 
## lowest : -1.46 -1.35 -1.33 -1.30 -1.28
## highest:  1.41  1.44  1.51  1.61  1.62 
## ---------------------------------------------------------------------------
## accel_belt_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     164       1  -5.595     -42     -39     -21     -15 
##     .75     .90     .95 
##      -5      49      51 
## 
## lowest : -120  -83  -82  -81  -80, highest:   78   79   81   83   85 
## ---------------------------------------------------------------------------
## accel_belt_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     143       1   30.15      -1       1       3      35 
##     .75     .90     .95 
##      61      68      71 
## 
## lowest : -69 -65 -54 -41 -38, highest: 109 121 149 150 164 
## ---------------------------------------------------------------------------
## accel_belt_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     299       1  -72.59    -190    -175    -162    -152 
##     .75     .90     .95 
##      27      42      45 
## 
## lowest : -275 -269 -268 -266 -265, highest:  101  102  103  104  105 
## ---------------------------------------------------------------------------
## magnet_belt_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     327       1    55.6      -4       0       9      35 
##     .75     .90     .95 
##      59     167     173 
## 
## lowest : -52 -49 -48 -46 -45, highest: 474 476 479 481 485 
## ---------------------------------------------------------------------------
## magnet_belt_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     298       1   593.7   525.1   564.0   581.0   601.0 
##     .75     .90     .95 
##   610.0   631.0   635.0 
## 
## lowest : 354 359 360 363 365, highest: 666 667 668 669 673 
## ---------------------------------------------------------------------------
## magnet_belt_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     457       1  -345.5    -452    -431    -375    -320 
##     .75     .90     .95 
##    -306    -297    -291 
## 
## lowest : -623 -621 -620 -618 -616, highest:  284  286  287  289  293 
## ---------------------------------------------------------------------------
## roll_arm 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    2654    0.99   17.83 -117.00  -70.09  -31.78    0.00 
##     .75     .90     .95 
##   77.30  113.00  135.00 
## 
## lowest : -180 -178 -177 -176 -175, highest:  176  177  178  179  180 
## ---------------------------------------------------------------------------
## pitch_arm 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    3087    0.99  -4.612   -56.0   -42.6   -25.9     0.0 
##     .75     .90     .95 
##    11.2    34.6    51.1 
## 
## lowest : -88.8 -88.2 -87.9 -87.8 -87.7
## highest:  86.6  86.8  87.1  88.2  88.5 
## ---------------------------------------------------------------------------
## yaw_arm 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    2876    0.99 -0.6188 -128.00  -92.09  -43.10    0.00 
##     .75     .90     .95 
##   45.88  102.00  116.00 
## 
## lowest : -180 -179 -178 -177 -176, highest:  176  177  178  179  180 
## ---------------------------------------------------------------------------
## total_accel_arm 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0      66       1   25.51       8      11      17      27 
##     .75     .90     .95 
##      33      38      42 
## 
## lowest :  1  2  3  4  5, highest: 62 63 64 65 66 
## ---------------------------------------------------------------------------
## gyros_arm_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     643       1 0.04277   -3.48   -2.79   -1.33    0.08 
##     .75     .90     .95 
##    1.57    2.59    3.12 
## 
## lowest : -6.37 -6.36 -6.34 -6.13 -6.12
## highest:  4.70  4.74  4.78  4.82  4.87 
## ---------------------------------------------------------------------------
## gyros_arm_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     376       1 -0.2571   -1.56   -1.25   -0.80   -0.24 
##     .75     .90     .95 
##    0.14    0.85    1.27 
## 
## lowest : -3.44 -3.40 -3.37 -3.32 -3.29
## highest:  2.76  2.78  2.79  2.81  2.84 
## ---------------------------------------------------------------------------
## gyros_arm_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     248       1  0.2695   -0.67   -0.43   -0.07    0.23 
##     .75     .90     .95 
##    0.72    0.95    1.10 
## 
## lowest : -2.33 -2.28 -2.17 -2.13 -2.10
## highest:  2.66  2.69  2.95  2.99  3.02 
## ---------------------------------------------------------------------------
## accel_arm_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     777       1  -60.24    -289    -280    -242     -44 
##     .75     .90     .95 
##      84     150     245 
## 
## lowest : -404 -383 -377 -371 -367, highest:  430  431  434  435  437 
## ---------------------------------------------------------------------------
## accel_arm_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     537       1    32.6    -134    -102     -54      14 
##     .75     .90     .95 
##     139     173     199 
## 
## lowest : -318 -315 -302 -301 -286, highest:  296  297  299  303  308 
## ---------------------------------------------------------------------------
## accel_arm_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     792       1  -71.25    -340    -249    -143     -47 
##     .75     .90     .95 
##      23      75     116 
## 
## lowest : -636 -630 -629 -613 -612, highest:  239  242  245  271  292 
## ---------------------------------------------------------------------------
## magnet_arm_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1339       1   191.7    -422    -403    -300     289 
##     .75     .90     .95 
##     637     728     744 
## 
## lowest : -584 -580 -579 -578 -576, highest:  777  778  779  780  782 
## ---------------------------------------------------------------------------
## magnet_arm_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     872       1   156.6    -200    -141      -9     202 
##     .75     .90     .95 
##     323     391     435 
## 
## lowest : -392 -386 -384 -381 -377, highest:  577  578  580  582  583 
## ---------------------------------------------------------------------------
## magnet_arm_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1265       1   306.5  -420.0  -242.0   131.2   444.0 
##     .75     .90     .95 
##   545.0   597.0   620.0 
## 
## lowest : -597 -596 -595 -590 -586, highest:  685  687  690  693  694 
## ---------------------------------------------------------------------------
## kurtosis_roll_arm 
##       n missing  unique 
##   19622       0     330 
## 
## lowest :          -0.02438 -0.04190 -0.05051 -0.05695
## highest: 3.96713  4.66566  5.50673  6.21009  7.66917  
## ---------------------------------------------------------------------------
## kurtosis_picth_arm 
##       n missing  unique 
##   19622       0     328 
## 
## lowest :          -0.00484 -0.01311 -0.02967 -0.07394
## highest: 4.41716  6.25063  6.39832  7.79477  9.16615  
## ---------------------------------------------------------------------------
## kurtosis_yaw_arm 
##       n missing  unique 
##   19622       0     395 
## 
## lowest :          -0.01548 -0.01749 -0.02101 -0.04059
## highest: 5.43713  5.46450  50.00000 56.00000 6.97222  
## ---------------------------------------------------------------------------
## skewness_roll_arm 
##       n missing  unique 
##   19622       0     331 
## 
## lowest :          -0.00051 -0.00696 -0.01884 -0.03359
## highest: 1.71066  2.09387  2.41765  4.15709  4.39449  
## ---------------------------------------------------------------------------
## skewness_pitch_arm 
##       n missing  unique 
##   19622       0     328 
## 
## lowest :          -0.00184 -0.01185 -0.01247 -0.02063
## highest: 2.11434  2.16019  2.19479  2.65520  3.04295  
## ---------------------------------------------------------------------------
## skewness_yaw_arm 
##       n missing  unique 
##   19622       0     395 
## 
## lowest :          -0.00311 -0.00562 -0.00800 -0.01697
## highest: 1.82084  2.18029  4.46409  7.07107  7.48331  
## ---------------------------------------------------------------------------
## roll_dumbbell 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0   16523       1   23.84 -110.15  -99.51  -18.49   48.17 
##     .75     .90     .95 
##   67.61  102.74  116.28 
## 
## lowest : -153.7 -153.5 -152.8 -152.4 -152.0
## highest:  151.0  151.4  152.1  153.4  153.5 
## ---------------------------------------------------------------------------
## pitch_dumbbell 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0   16040       1  -10.78  -56.44  -51.70  -40.89  -20.96 
##     .75     .90     .95 
##   17.50   38.51   51.34 
## 
## lowest : -149.6 -148.5 -137.3 -134.7 -130.1
## highest:  127.9  129.5  129.8  137.0  149.4 
## ---------------------------------------------------------------------------
## yaw_dumbbell 
##        n  missing   unique     Info     Mean      .05      .10      .25 
##    19622        0    16381        1    1.674 -102.418  -95.474  -77.644 
##      .50      .75      .90      .95 
##   -3.324   79.643  121.218  129.065 
## 
## lowest : -150.9 -148.8 -147.1 -146.2 -144.3
## highest:  154.1  154.2  154.5  154.8  155.0 
## ---------------------------------------------------------------------------
## kurtosis_roll_dumbbell 
##       n missing  unique 
##   19622       0     398 
## 
## lowest :         -0.0035 -0.0073 -0.0115 -0.0262
## highest: 6.1973  7.4175  7.5633  7.9609  8.9336  
## ---------------------------------------------------------------------------
## kurtosis_picth_dumbbell 
##       n missing  unique 
##   19622       0     401 
## 
## lowest :         -0.0163 -0.0233 -0.0280 -0.0308
## highest: 5.7918  5.8136  5.9003  55.6279 9.5485  
## ---------------------------------------------------------------------------
## kurtosis_yaw_dumbbell 
##       n missing  unique 
##   19622       0       2 
## 
##  (19216, 98%), #DIV/0! (406, 2%) 
## ---------------------------------------------------------------------------
## skewness_roll_dumbbell 
##       n missing  unique 
##   19622       0     401 
## 
## lowest :         -0.0082 -0.0096 -0.0172 -0.0224
## highest: 1.5574  1.5964  1.7210  1.9255  1.9579  
## ---------------------------------------------------------------------------
## skewness_pitch_dumbbell 
##       n missing  unique 
##   19622       0     402 
## 
## lowest :         -0.0053 -0.0084 -0.0166 -0.0452
## highest: 1.7872  1.8951  2.0773  2.5456  3.7687  
## ---------------------------------------------------------------------------
## skewness_yaw_dumbbell 
##       n missing  unique 
##   19622       0       2 
## 
##  (19216, 98%), #DIV/0! (406, 2%) 
## ---------------------------------------------------------------------------
## max_yaw_dumbbell 
##       n missing  unique 
##   19622       0      73 
## 
## lowest :      -0.1 -0.2 -0.3 -0.4, highest: 6.2  7.4  7.6  8.0  8.9  
## ---------------------------------------------------------------------------
## min_yaw_dumbbell 
##       n missing  unique 
##   19622       0      73 
## 
## lowest :      -0.1 -0.2 -0.3 -0.4, highest: 6.2  7.4  7.6  8.0  8.9  
## ---------------------------------------------------------------------------
## amplitude_yaw_dumbbell 
##       n missing  unique 
##   19622       0       3 
## 
##  (19216, 98%), #DIV/0! (5, 0%), 0.00 (401, 2%) 
## ---------------------------------------------------------------------------
## total_accel_dumbbell 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0      43       1   13.72       2       2       4      10 
##     .75     .90     .95 
##      19      30      31 
## 
## lowest :  0  1  2  3  4, highest: 38 39 40 42 58 
## ---------------------------------------------------------------------------
## gyros_dumbbell_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     241       1  0.1611   -0.37   -0.21   -0.03    0.13 
##     .75     .90     .95 
##    0.35    0.59    0.83 
## 
## lowest : -204.00   -1.99   -1.94   -1.86   -1.85
## highest:    2.07    2.14    2.17    2.20    2.22 
## ---------------------------------------------------------------------------
## gyros_dumbbell_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     278       1 0.04606   -0.80   -0.48   -0.14    0.03 
##     .75     .90     .95 
##    0.21    0.56    0.92 
## 
## lowest : -2.10 -2.07 -2.06 -2.04 -2.01
## highest:  2.63  2.71  2.73  4.37 52.00 
## ---------------------------------------------------------------------------
## gyros_dumbbell_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     206       1  -0.129   -0.67   -0.49   -0.31   -0.13 
##     .75     .90     .95 
##    0.03    0.20    0.33 
## 
## lowest :  -2.38  -2.30  -2.08  -2.00  -1.95
## highest:   1.61   1.67   1.72   1.87 317.00 
## ---------------------------------------------------------------------------
## accel_dumbbell_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     425       1  -28.62    -145    -140     -50      -8 
##     .75     .90     .95 
##      11      29      53 
## 
## lowest : -419 -237 -236 -235 -234, highest:  217  219  224  234  235 
## ---------------------------------------------------------------------------
## accel_dumbbell_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     466       1   52.63   -61.0   -41.0    -8.0    41.5 
##     .75     .90     .95 
##   111.0   161.0   194.0 
## 
## lowest : -189 -182 -181 -179 -176, highest:  299  300  302  310  315 
## ---------------------------------------------------------------------------
## accel_dumbbell_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     410       1  -38.32    -206    -197    -142      -1 
##     .75     .90     .95 
##      38      88      98 
## 
## lowest : -334 -319 -284 -273 -272, highest:  314  315  316  317  318 
## ---------------------------------------------------------------------------
## magnet_dumbbell_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1128       1  -328.5  -579.0  -564.0  -535.0  -479.0 
##     .75     .90     .95 
##  -304.0   445.9   520.0 
## 
## lowest : -643 -639 -638 -637 -635, highest:  579  582  583  584  592 
## ---------------------------------------------------------------------------
## magnet_dumbbell_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     844       1     221    -562    -516     231     311 
##     .75     .90     .95 
##     390     500     551 
## 
## lowest : -3600  -744  -742  -741  -740
## highest:   629   630   631   632   633 
## ---------------------------------------------------------------------------
## magnet_dumbbell_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     676       1   46.05    -152     -93     -45      13 
##     .75     .90     .95 
##      95     294     345 
## 
## lowest : -262 -250 -249 -248 -245, highest:  442  443  447  451  452 
## ---------------------------------------------------------------------------
## roll_forearm 
##         n   missing    unique      Info      Mean       .05       .10 
##     19622         0      2176      0.99     33.83 -175.0000 -154.0000 
##       .25       .50       .75       .90       .95 
##   -0.7375   21.7000  140.0000  162.0000  174.0000 
## 
## lowest : -180 -179 -178 -177 -176, highest:  176  177  178  179  180 
## ---------------------------------------------------------------------------
## pitch_forearm 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    2915    0.99   10.71  -51.40  -19.20    0.00    9.24 
##     .75     .90     .95 
##   28.40   46.10   54.00 
## 
## lowest : -72.5 -72.4 -72.1 -71.6 -71.4
## highest:  87.5  87.9  88.4  88.7  89.8 
## ---------------------------------------------------------------------------
## yaw_forearm 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1991    0.99   19.21  -153.0  -138.0   -68.6     0.0 
##     .75     .90     .95 
##   110.0   151.0   163.0 
## 
## lowest : -180 -179 -178 -177 -176, highest:  176  177  178  179  180 
## ---------------------------------------------------------------------------
## kurtosis_roll_forearm 
##       n missing  unique 
##   19622       0     322 
## 
## lowest :         -0.0227 -0.0359 -0.0567 -0.0781
## highest: 3.4833  4.3514  4.5821  40.0597 8.8039  
## ---------------------------------------------------------------------------
## kurtosis_picth_forearm 
##       n missing  unique 
##   19622       0     323 
## 
## lowest :         -0.0073 -0.0442 -0.0489 -0.0523
## highest: 8.8271  8.8831  9.1693  9.5584  9.9138  
## ---------------------------------------------------------------------------
## kurtosis_yaw_forearm 
##       n missing  unique 
##   19622       0       2 
## 
##  (19216, 98%), #DIV/0! (406, 2%) 
## ---------------------------------------------------------------------------
## skewness_roll_forearm 
##       n missing  unique 
##   19622       0     323 
## 
## lowest :         -0.0004 -0.0013 -0.0063 -0.0088
## highest: 1.2817  1.8676  1.9796  2.6579  5.8557  
## ---------------------------------------------------------------------------
## skewness_pitch_forearm 
##       n missing  unique 
##   19622       0     319 
## 
## lowest :         -0.0113 -0.0131 -0.0405 -0.0478
## highest: 2.4216  2.5226  2.7813  3.5998  4.4641  
## ---------------------------------------------------------------------------
## skewness_yaw_forearm 
##       n missing  unique 
##   19622       0       2 
## 
##  (19216, 98%), #DIV/0! (406, 2%) 
## ---------------------------------------------------------------------------
## max_yaw_forearm 
##       n missing  unique 
##   19622       0      45 
## 
## lowest :      -0.1 -0.2 -0.3 -0.4, highest: 3.5  4.4  4.6  40.1 8.8  
## ---------------------------------------------------------------------------
## min_yaw_forearm 
##       n missing  unique 
##   19622       0      45 
## 
## lowest :      -0.1 -0.2 -0.3 -0.4, highest: 3.5  4.4  4.6  40.1 8.8  
## ---------------------------------------------------------------------------
## amplitude_yaw_forearm 
##       n missing  unique 
##   19622       0       3 
## 
##  (19216, 98%), #DIV/0! (84, 0%), 0.00 (322, 2%) 
## ---------------------------------------------------------------------------
## total_accel_forearm 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0      70       1   34.72      15      23      29      36 
##     .75     .90     .95 
##      41      47      50 
## 
## lowest :   0   1   2   3   4, highest:  68  73  78  79 108 
## ---------------------------------------------------------------------------
## gyros_forearm_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     298       1   0.158   -0.75   -0.55   -0.22    0.05 
##     .75     .90     .95 
##    0.56    1.04    1.24 
## 
## lowest : -22.00  -4.95  -3.36  -3.08  -2.99
## highest:   3.10   3.26   3.48   3.52   3.97 
## ---------------------------------------------------------------------------
## gyros_forearm_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     741       1 0.07517   -3.61   -2.94   -1.46    0.03 
##     .75     .90     .95 
##    1.62    2.83    3.50 
## 
## lowest :  -7.02  -6.65  -6.62  -6.54  -6.52
## highest:   6.09   6.10   6.12   6.13 311.00 
## ---------------------------------------------------------------------------
## gyros_forearm_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     307       1  0.1512   -0.82   -0.57   -0.18    0.08 
##     .75     .90     .95 
##    0.49    0.89    1.13 
## 
## lowest :  -8.09  -7.94  -6.99  -5.55  -4.28
## highest:   3.35   4.04   4.10   4.31 231.00 
## ---------------------------------------------------------------------------
## accel_forearm_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     794       1  -61.65    -392    -331    -178     -57 
##     .75     .90     .95 
##      76     188     223 
## 
## lowest : -498 -496 -487 -479 -477, highest:  370  375  381  389  477 
## ---------------------------------------------------------------------------
## accel_forearm_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1003       1   163.7    -227    -140      57     201 
##     .75     .90     .95 
##     312     395     436 
## 
## lowest : -632 -595 -585 -537 -496, highest:  588  589  590  591  923 
## ---------------------------------------------------------------------------
## accel_forearm_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0     580       1  -55.29  -222.0  -209.0  -182.0   -39.0 
##     .75     .90     .95 
##    26.0   175.9   199.0 
## 
## lowest : -446 -410 -391 -386 -381, highest:  275  277  285  287  291 
## ---------------------------------------------------------------------------
## magnet_forearm_x 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1524       1  -312.6    -700    -684    -616    -378 
##     .75     .90     .95 
##     -73     128     379 
## 
## lowest : -1280 -1270 -1260 -1250 -1240
## highest:   660   661   663   666   672 
## ---------------------------------------------------------------------------
## magnet_forearm_y 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1872       1   380.1  -632.9  -472.9     2.0   591.0 
##     .75     .90     .95 
##   737.0   797.0   981.0 
## 
## lowest : -896 -892 -890 -885 -882, highest: 1430 1440 1450 1460 1480 
## ---------------------------------------------------------------------------
## magnet_forearm_z 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##   19622       0    1683       1   393.6  -453.0  -153.9   191.0   511.0 
##     .75     .90     .95 
##   653.0   736.0   799.0 
## 
## lowest : -973 -966 -964 -963 -962, highest: 1040 1050 1070 1080 1090 
## ---------------------------------------------------------------------------
## classe 
##       n missing  unique 
##   19622       0       5 
## 
##              A    B    C    D    E
## Frequency 5580 3797 3422 3216 3607
## %           28   19   17   16   18
## ---------------------------------------------------------------------------
describe(dfTraining$classe)
## dfTraining$classe 
##       n missing  unique 
##   19622       0       5 
## 
##              A    B    C    D    E
## Frequency 5580 3797 3422 3216 3607
## %           28   19   17   16   18
colNames <- colnames(dfTraining)

for (i in 1:ncol(dfTraining)) {
  print(qplot(x = dfTraining[,i],data=dfTraining,geom = "histogram") +
          labs(x = colNames[i]))
}

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

We first see that some of our predictors contain missing data. We might want to impute this before we begin to try and predict our classe variable. We also see that our data contains some outliers, which suggests we may want to transform some predictors. A log function may be a poor choise here as many predictors have zero values.

Clean The Data

Imputation

# library(mice)
# 
# temp <- dfTraining[,10:20]
# 
# impTrain <- mice(temp,m=2)

Scale outliers in data

We have to be careful here, as we have many values centered at zero. We also do not want to introduce negative values to our predictors unless we are sure it makes sense.

# dfTemp <- sapply(dfTraining,is.numeric)
# 
# dfTemp <- dfTraining[,dfTemp]
# 
# dfTemp <- scale(dfTemp,center=TRUE,scale=TRUE)
# 
# colnames(dfTemp) <- paste0(colnames(dfTemp),"_std")
# 
# x <- cbind(dfTraining,dfTemp)

Examine the relationship between our dependent and independent variables

colNames <- colnames(dfTraining)

for(i in 1:ncol(dfTraining)) {
  print(ggplot(data = dfTraining, aes(y=dfTraining[,i],x=dfTraining$classe)) +
    geom_point() + 
    labs(y = colNames[i], x = "classe"))
}

Split Data into Training/Test/Validation

inBuild <- createDataPartition(y=dfTraining$classe,p=.7,list=FALSE)

buildData <- dfTraining[inBuild,]

validation <- dfTraining[-inBuild,]

inTrain <- createDataPartition(y=buildData$classe,p=.6,list=FALSE)

training <- buildData[inTrain,]

testing <- buildData[-inTrain,]

Variable Selection

# library(Hmisc)
# 
# corr <- rcorr(training,type="pearson")

x <- filterVarImp(x = training[, -ncol(training)], y = training$classe)

print(x)
##                                 A         B         C         D         E
## user_name               0.5134853 0.5397004 0.5309608 0.5397004 0.5294021
## raw_timestamp_part_1    0.6268866 0.6268866 0.6096734 0.6054419 0.6248057
## raw_timestamp_part_2    0.5073593 0.5132744 0.5092069 0.5078197 0.5132744
## cvtd_timestamp          0.5769398 0.6052389 0.5643904 0.6024619 0.6052389
## new_window              0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## num_window              0.6636008 0.6636008 0.6425493 0.5596084 0.6466329
## roll_belt               0.6381112 0.6246830 0.6299681 0.6444705 0.6444705
## pitch_belt              0.5427825 0.5176937 0.5369733 0.5274233 0.5427825
## yaw_belt                0.5658871 0.5666952 0.5552093 0.5583397 0.5666952
## total_accel_belt        0.5734829 0.5453780 0.5349146 0.5453780 0.5734829
## kurtosis_roll_belt      0.4996725 0.5019988 0.5019988 0.5008152 0.5014387
## kurtosis_picth_belt     0.4997124 0.5020494 0.5020494 0.5008486 0.5014252
## kurtosis_yaw_belt       0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## skewness_roll_belt      0.4996553 0.5020407 0.5020407 0.5008783 0.5014892
## skewness_roll_belt.1    0.4996490 0.5019585 0.5019585 0.5008291 0.5014294
## skewness_yaw_belt       0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## max_yaw_belt            0.4996631 0.5019851 0.5019851 0.5008166 0.5014290
## min_yaw_belt            0.4996631 0.5019851 0.5019851 0.5008166 0.5014290
## amplitude_yaw_belt      0.4997005 0.5020082 0.5020082 0.5008243 0.5014484
## gyros_belt_x            0.5182420 0.5169422 0.5120470 0.5182420 0.5093889
## gyros_belt_y            0.5109178 0.5065494 0.5152846 0.5145634 0.5152846
## gyros_belt_z            0.5705812 0.5526004 0.5562532 0.5707359 0.5707359
## accel_belt_x            0.5530465 0.5531390 0.5568565 0.5657906 0.5657906
## accel_belt_y            0.5233597 0.5408436 0.5362591 0.5248257 0.5408436
## accel_belt_z            0.6276412 0.5547544 0.5601100 0.5403503 0.6276412
## magnet_belt_x           0.5751567 0.5779000 0.5745523 0.5515748 0.5779000
## magnet_belt_y           0.7042735 0.6846954 0.6864964 0.6961019 0.7042735
## magnet_belt_z           0.6610409 0.6570988 0.6548968 0.6678376 0.6678376
## roll_arm                0.6135021 0.6135021 0.5796561 0.5711926 0.5659869
## pitch_arm               0.6451702 0.5590683 0.6204978 0.6256854 0.6451702
## yaw_arm                 0.5469351 0.5469351 0.5366485 0.5306448 0.5217757
## total_accel_arm         0.6270318 0.5963430 0.6034105 0.6270318 0.6060178
## gyros_arm_x             0.5225574 0.5225574 0.5215749 0.5094470 0.5211644
## gyros_arm_y             0.5327728 0.5327728 0.5229546 0.5285199 0.5316335
## gyros_arm_z             0.5127819 0.5132959 0.5091482 0.5086250 0.5132959
## accel_arm_x             0.7252781 0.6502704 0.6587791 0.7252781 0.6844558
## accel_arm_y             0.5800096 0.5471228 0.5689480 0.5740486 0.5800096
## accel_arm_z             0.6088139 0.5878772 0.6088139 0.5874760 0.5551867
## magnet_arm_x            0.7388055 0.6576832 0.6812789 0.7388055 0.6941739
## magnet_arm_y            0.7342272 0.6158267 0.6818645 0.7342272 0.7033880
## magnet_arm_z            0.6622455 0.6622455 0.6262355 0.6366404 0.6494513
## kurtosis_roll_arm       0.4996742 0.5020306 0.5020306 0.5008642 0.5014921
## kurtosis_picth_arm      0.4996493 0.5019814 0.5019814 0.5008549 0.5014979
## kurtosis_yaw_arm        0.4996244 0.5019918 0.5019918 0.5008236 0.5014246
## skewness_roll_arm       0.4996612 0.5020337 0.5020337 0.5008804 0.5014805
## skewness_pitch_arm      0.4996573 0.5019953 0.5019953 0.5008734 0.5014701
## skewness_yaw_arm        0.4996846 0.5020158 0.5020158 0.5008904 0.5014598
## roll_dumbbell           0.6588673 0.6961325 0.7602064 0.7602064 0.6763427
## pitch_dumbbell          0.6579801 0.7119805 0.7119805 0.6792556 0.6373775
## yaw_dumbbell            0.5543138 0.6190584 0.6190584 0.5659446 0.5874803
## kurtosis_roll_dumbbell  0.4996431 0.5019979 0.5019979 0.5008551 0.5014693
## kurtosis_picth_dumbbell 0.4996891 0.5019892 0.5019892 0.5008380 0.5014383
## kurtosis_yaw_dumbbell   0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## skewness_roll_dumbbell  0.4996993 0.5020136 0.5020136 0.5008403 0.5014577
## skewness_pitch_dumbbell 0.4996849 0.5019609 0.5019609 0.5008268 0.5014060
## skewness_yaw_dumbbell   0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## max_yaw_dumbbell        0.4996612 0.5020178 0.5020178 0.5008463 0.5014621
## min_yaw_dumbbell        0.4996612 0.5020178 0.5020178 0.5008463 0.5014621
## amplitude_yaw_dumbbell  0.4996913 0.5020014 0.5020014 0.5008380 0.5014478
## total_accel_dumbbell    0.5483854 0.6109927 0.5745746 0.6109927 0.5988302
## gyros_dumbbell_x        0.5232314 0.5263436 0.5305536 0.5305536 0.4933295
## gyros_dumbbell_y        0.5412461 0.5496939 0.5641117 0.5641117 0.5534297
## gyros_dumbbell_z        0.5162821 0.5162821 0.5102417 0.5010332 0.5093901
## accel_dumbbell_x        0.6685349 0.6685349 0.6646330 0.6373326 0.6231495
## accel_dumbbell_y        0.6127719 0.6331695 0.6331695 0.6300618 0.5852254
## accel_dumbbell_z        0.6247845 0.6247845 0.6169780 0.5756091 0.5896521
## magnet_dumbbell_x       0.7000778 0.7000778 0.6922764 0.6537327 0.6651159
## magnet_dumbbell_y       0.6425914 0.7026177 0.7026177 0.6547110 0.6659260
## magnet_dumbbell_z       0.6699102 0.5822907 0.6699102 0.6210012 0.6648677
## roll_forearm            0.6049963 0.5772592 0.6049963 0.5790523 0.5591335
## pitch_forearm           0.7978269 0.6874454 0.7071158 0.7978269 0.7083352
## yaw_forearm             0.5183144 0.5654765 0.5786503 0.5786503 0.5669315
## kurtosis_roll_forearm   0.4997041 0.5019812 0.5019812 0.5008398 0.5014273
## kurtosis_picth_forearm  0.4997084 0.5020099 0.5020099 0.5008009 0.5014492
## kurtosis_yaw_forearm    0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## skewness_roll_forearm   0.4996947 0.5019737 0.5019737 0.5007879 0.5014352
## skewness_pitch_forearm  0.4996704 0.5019790 0.5019790 0.5008477 0.5014385
## skewness_yaw_forearm    0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## max_yaw_forearm         0.4997057 0.5019827 0.5019827 0.5008380 0.5014255
## min_yaw_forearm         0.4997057 0.5019827 0.5019827 0.5008380 0.5014255
## amplitude_yaw_forearm   0.4996987 0.5020182 0.5020182 0.5008542 0.5014695
## total_accel_forearm     0.6053719 0.5864724 0.5706206 0.5927630 0.6053719
## gyros_forearm_x         0.5157432 0.5204091 0.5286898 0.5286898 0.5251298
## gyros_forearm_y         0.5125610 0.5125610 0.5101955 0.5075451 0.5082118
## gyros_forearm_z         0.5196666 0.5232374 0.5232374 0.5208738 0.5125013
## accel_forearm_x         0.7538789 0.6626486 0.7053762 0.7538789 0.6621246
## accel_forearm_y         0.5893872 0.5755231 0.5893872 0.5826360 0.5604263
## accel_forearm_z         0.5301225 0.5363562 0.5363562 0.5327640 0.5315553
## magnet_forearm_x        0.7203770 0.6551242 0.6213740 0.7203770 0.6420153
## magnet_forearm_y        0.6240376 0.5800635 0.6466061 0.6466061 0.6117382
## magnet_forearm_z        0.5526250 0.5402176 0.6023436 0.6023436 0.5880043

Multinomial Regression

library(caret)
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
registerDoParallel(cores=2)

dfTemp <- sapply(training,is.numeric)

dfTemp <- training[,dfTemp]
dfTemp$classe <- training$classe

fitControl <- trainControl(## 2-fold CV
                           method = "repeatedcv",
                           number = 5,
                           ## repeated 2 times
                           repeats = 5)
  
lm1 <- train(classe ~ .
             ,method="multinom"
             ,trControl = fitControl
             ,preProcess ="pca"
             , data=dfTemp)
## Loading required package: nnet
## # weights:  140 (108 variable)
## initial  value 13268.206150 
## iter  10 value 10128.419146
## iter  20 value 9921.604378
## iter  30 value 9905.456318
## iter  40 value 9702.675733
## iter  50 value 9640.053564
## iter  60 value 9599.426936
## iter  70 value 9595.803639
## iter  80 value 9591.824567
## iter  90 value 9590.310992
## iter 100 value 9589.986044
## final  value 9589.986044 
## stopped after 100 iterations
#training evaluation
lm1Pred <- predict(lm1,training)
confusionMatrix(lm1Pred, training$classe)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1614  319  365  118  164
##          B  172  738  145  158  257
##          C  190  232  739  196  182
##          D  301  145  133  712  161
##          E   67  161   56  168  751
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5524          
##                  95% CI : (0.5416, 0.5632)
##     No Information Rate : 0.2843          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4322          
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.6886  0.46270  0.51391  0.52663   0.4957
## Specificity            0.8363  0.88991  0.88246  0.89263   0.9328
## Pos Pred Value         0.6256  0.50204  0.48018  0.49036   0.6243
## Neg Pred Value         0.8711  0.87349  0.89575  0.90577   0.8915
## Prevalence             0.2843  0.19347  0.17443  0.16400   0.1838
## Detection Rate         0.1958  0.08952  0.08964  0.08637   0.0911
## Detection Prevalence   0.3130  0.17831  0.18668  0.17613   0.1459
## Balanced Accuracy      0.7624  0.67630  0.69818  0.70963   0.7143
# 
# logReg <- lm$finalModel
# 
# print(logReg)

The performance for our multinomial regression is very poor to begin with (out of sample error of 0.5565). This suggests that I might try a non linear approach such as Random Forest.

Random Forest

library(caret)
library(doParallel)
registerDoParallel(cores=2)

dfTemp <- sapply(training,is.numeric)

dfTemp <- training[,dfTemp]
dfTemp$classe <- training$classe
#dfTemp$max_yaw_dumbbell <- training$max_yaw_dumbbell

fitControl <- trainControl(## 3-fold CV
                           method = "repeatedcv",
                           number = 3,
                           ## repeated 3 times
                           repeats = 3)

rf1 <- train(classe ~ .
             ,method="rf"
             ,trControl = fitControl
             ,preProcess ="pca"
             , data=dfTemp)
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:Hmisc':
## 
##     combine
finMod2 <- rf1$finalModel
print(finMod2)
## 
## Call:
##  randomForest(x = x, y = y, mtry = param$mtry) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 3.91%
## Confusion matrix:
##      A    B    C    D    E class.error
## A 2305    9    9   14    7  0.01663823
## B   50 1496   40    3    6  0.06206897
## C    6   40 1368   21    3  0.04867872
## D    5    4   65 1273    5  0.05843195
## E    2   10   11   12 1480  0.02310231
#training evaluation
rf1Pred <- predict(rf1,training)
confusionMatrix(rf1Pred, training$classe)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 2344    0    0    0    0
##          B    0 1595    0    0    0
##          C    0    0 1438    0    0
##          D    0    0    0 1352    0
##          E    0    0    0    0 1515
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9996, 1)
##     No Information Rate : 0.2843     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            1.0000   1.0000   1.0000    1.000   1.0000
## Specificity            1.0000   1.0000   1.0000    1.000   1.0000
## Pos Pred Value         1.0000   1.0000   1.0000    1.000   1.0000
## Neg Pred Value         1.0000   1.0000   1.0000    1.000   1.0000
## Prevalence             0.2843   0.1935   0.1744    0.164   0.1838
## Detection Rate         0.2843   0.1935   0.1744    0.164   0.1838
## Detection Prevalence   0.2843   0.1935   0.1744    0.164   0.1838
## Balanced Accuracy      1.0000   1.0000   1.0000    1.000   1.0000
#validation evaluation
rf1Pred <- predict(rf1,validation)
confusionMatrix(rf1Pred, validation$classe)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1648   26    1    4    0
##          B   14 1081   43    5    1
##          C    3   29  970   48    7
##          D    9    1    9  901   12
##          E    0    2    3    6 1062
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9621          
##                  95% CI : (0.9569, 0.9668)
##     No Information Rate : 0.2845          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9521          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9845   0.9491   0.9454   0.9346   0.9815
## Specificity            0.9926   0.9867   0.9821   0.9937   0.9977
## Pos Pred Value         0.9815   0.9449   0.9177   0.9667   0.9897
## Neg Pred Value         0.9938   0.9878   0.9884   0.9873   0.9958
## Prevalence             0.2845   0.1935   0.1743   0.1638   0.1839
## Detection Rate         0.2800   0.1837   0.1648   0.1531   0.1805
## Detection Prevalence   0.2853   0.1944   0.1796   0.1584   0.1823
## Balanced Accuracy      0.9886   0.9679   0.9638   0.9642   0.9896

Pick our model

Now that we have evaluated our two models on our training and validation set, it is time to see how well they both generalize to our testing set. This will help us decide which model we choose for the programming part of this assignment.

#Random Forest
rf1Pred <- predict(rf1,testing)
confusionMatrix(rf1Pred, testing$classe)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1528   25    2    1    1
##          B   22 1006   42    4    3
##          C    5   26  902   49    7
##          D    5    3   10  846    7
##          E    2    3    2    0  992
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9601          
##                  95% CI : (0.9546, 0.9652)
##     No Information Rate : 0.2844          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9496          
##  Mcnemar's Test P-Value : 3.371e-06       
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9782   0.9464   0.9415   0.9400   0.9822
## Specificity            0.9926   0.9840   0.9808   0.9946   0.9984
## Pos Pred Value         0.9814   0.9341   0.9120   0.9713   0.9930
## Neg Pred Value         0.9914   0.9871   0.9876   0.9883   0.9960
## Prevalence             0.2844   0.1935   0.1744   0.1638   0.1839
## Detection Rate         0.2782   0.1831   0.1642   0.1540   0.1806
## Detection Prevalence   0.2835   0.1961   0.1800   0.1586   0.1819
## Balanced Accuracy      0.9854   0.9652   0.9612   0.9673   0.9903
#Multinomial Logit

lm1Pred <- predict(lm1,testing)
confusionMatrix(lm1Pred, testing$classe)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 1090  224  224   90  122
##          B  126  472  111  118  181
##          C  122  165  487  126  121
##          D  186  100   87  444   91
##          E   38  102   49  122  495
## 
## Overall Statistics
##                                           
##                Accuracy : 0.544           
##                  95% CI : (0.5307, 0.5572)
##     No Information Rate : 0.2844          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4208          
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.6978  0.44403  0.50835  0.49333  0.49010
## Specificity            0.8321  0.87901  0.88225  0.89898  0.93063
## Pos Pred Value         0.6229  0.46825  0.47698  0.48899  0.61414
## Neg Pred Value         0.8739  0.86823  0.89468  0.90055  0.89012
## Prevalence             0.2844  0.19352  0.17440  0.16384  0.18387
## Detection Rate         0.1984  0.08593  0.08866  0.08083  0.09011
## Detection Prevalence   0.3186  0.18351  0.18587  0.16530  0.14673
## Balanced Accuracy      0.7650  0.66152  0.69530  0.69616  0.71036

Our final results show that our random forest model provides better accuracy but also generalizes better against our test set.